RGB and RGB-D Fusion

borrow code from https://github.com/jiangyao-scu/JL-DCF-pytorch and add some tips
transformer
Author

Bowen

Published

December 3, 2021

import mmcv
import matplotlib.pyplot as plt
from fastcore.basics import *
from fastai.vision.all import *
from fastai.torch_basics import *
import warnings
warnings.filterwarnings("ignore")
import kornia
from kornia.constants import Resample
from kornia.color import *
from kornia import augmentation as K
import kornia.augmentation as F
import kornia.augmentation.random_generator as rg
from torchvision.transforms import functional as tvF
from torchvision.transforms import transforms
from torchvision.transforms import PILToTensor
from functools import partial
from timm.models.layers import trunc_normal_, DropPath
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from timm.models.vision_transformer import _cfg
from einops import rearrange
from timm.models.registry import register_model

set_seed(105)

dataset overview

rootPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/')
rgbPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/LR/')
depPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/')
gtPath = Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/GT/')
rgbFiles = get_image_files(rgbPath)
depFiles = get_image_files(depPath)
lblFiles = get_image_files(gtPath)
rgbFiles[0]
depFiles[0]
lblFiles[0]
Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/LR/001477_left_1_ori.jpg')
Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png')
Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/GT/9_07-38-26_0_GT.png')
to_tensor = transforms.ToTensor()
to_pil = transforms.ToPILImage()
rgbImage = Image.open(rgbFiles[0])
lblImage = Image.open(lblFiles[0])
depImage = Image.open(depFiles[0])
rgbTensor = image2tensor(rgbImage)
lblTensor = image2tensor(lblImage)
depTensor = image2tensor(depImage)
rgbImage

depImage
depImage.shape

depTensor.shape
torch.unique(depTensor)
len(torch.unique(depTensor))
torch.Size([3, 480, 640])
tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
         14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
         28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
         42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
         56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
         70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
         84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
         98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
        112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
        126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
        140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
        154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
        168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
        182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195,
        196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
        210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
        224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237,
        238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
        252, 253, 254, 255], dtype=torch.uint8)
256
depFiles[0]
Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png')
import cv2
im = cv2.imread('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png',-1)
# cv2.imshow("test",im) 
attention: using opencv to read the depth image we can get the 3 dimensions of it,but using PIL,we can only get 1 dimension

in_ = np.array(im, dtype=np.float32)
in_
# attention: using opencv to read the depth image we can get the 3 dimensions of it,but using PIL,we can only get 1 dimension
in_.shape
len(np.unique(in_))
array([[[172., 172., 172.],
        [172., 172., 172.],
        [172., 172., 172.],
        ...,
        [205., 205., 205.],
        [205., 205., 205.],
        [205., 205., 205.]],

       [[172., 172., 172.],
        [172., 172., 172.],
        [172., 172., 172.],
        ...,
        [205., 205., 205.],
        [205., 205., 205.],
        [205., 205., 205.]],

       [[171., 171., 171.],
        [171., 171., 171.],
        [171., 171., 171.],
        ...,
        [205., 205., 205.],
        [205., 205., 205.],
        [205., 205., 205.]],

       ...,

       [[135., 135., 135.],
        [127., 127., 127.],
        [108., 108., 108.],
        ...,
        [249., 249., 249.],
        [249., 249., 249.],
        [249., 249., 249.]],

       [[134., 134., 134.],
        [127., 127., 127.],
        [108., 108., 108.],
        ...,
        [249., 249., 249.],
        [249., 249., 249.],
        [249., 249., 249.]],

       [[134., 134., 134.],
        [125., 125., 125.],
        [108., 108., 108.],
        ...,
        [249., 249., 249.],
        [249., 249., 249.],
        [249., 249., 249.]]], dtype=float32)
(480, 640, 3)
256
def Normalization(image):
#     set_trace()
    # 最后一维倒着取数
    # cv2 读取图片的顺序是BGR,转换为RGB格式
    in_ = image[:, :, ::-1]
    in_ = in_ / 255.0
    in_ -= np.array((0.485, 0.456, 0.406))
    in_ /= np.array((0.229, 0.224, 0.225))
    return in_
temp = np.array((1,1,1),dtype=np.float32)
temp2 = np.array((2,2,2),dtype=np.float32)
temp-=temp2
temp/=temp2
temp
array([-0.5, -0.5, -0.5], dtype=float32)
# author use the following method to read both rgbImage and depth image
def load_image(path,image_size):
    im = cv2.imread(path)
    in_ = np.array(im, dtype=np.float32)
    in_ = cv2.resize(in_, (image_size, image_size))
    in_ = Normalization(in_)
    return in_
def load_sal_label(path,image_size):
    im = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    label = np.array(im, dtype=np.float32)
    label = cv2.resize(label, (image_size, image_size))
    label = label / 255.0
    label = label[..., np.newaxis]
    return label
rgbFiles[0]
Path('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/LR/001477_left_1_ori.jpg')

slicing issues of three dimensions


import numpy as np
 
b = np.array([[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
              [[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]],
              [[25, 26, 27, 28], [29, 30, 31, 32], [33, 34, 35, 36]],
              ])
 
print(f'b is {b}')
print(b.shape)
print("b[:, :, ::-1]",b[:, :, ::-1], b[:, :, ::-1].shape)
print("b[0, ::],b[1, ::],b[-1, ::],b[0:2, ::]")
print("b[0, ::]",b[0, ::], b[0, ::].shape)
print("b[1, ::]",b[1, ::], b[1, ::].shape)
print("b[-1, ::]",b[-1, ::], b[-1, ::].shape)
print("b[0:2, ::]",b[0:2, ::], b[0:2, ::].shape)
print("b[:, 0:],b[:, 1:],b[:, -1:],b[:, 0:2:]")
print("b[:, 0:]",b[:, 0:], b[:, 0:].shape)
print("b[:, 1:]",b[:, 1:], b[:, 1:].shape)
print("b[:, -1:]",b[:, -1:], b[:, -1:].shape)
print("b[:, 0:2:]",b[:, 0:2:], b[:, 0:2:].shape)
print("b[::, 0],b[::, 1],b[::, -1],b[::, 0:2:]")
print("b[::, 0]",b[::, 0], b[::, 0].shape)
print("b[::, 1]",b[::, 1], b[::, 1].shape)
print("b[::, -1]",b[::, -1], b[::, -1].shape)
print("b[::, 0:2:]",b[::, 0:2:], b[::, 0:2].shape)
print("b[:,:, 0],b[:,:, 1],b[:,:, -1],b[:,:, 0:2:]")
print("b[:, :, 0]",b[:, :, 0], b[:, :, 0].shape)
print("b[:, :, 1]",b[:, :, 1], b[:, :, 1].shape)
print("b[:, :, -1]",b[:, :, -1], b[:, :, -1].shape)
b is [[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]

 [[13 14 15 16]
  [17 18 19 20]
  [21 22 23 24]]

 [[25 26 27 28]
  [29 30 31 32]
  [33 34 35 36]]]
(3, 3, 4)
b[:, :, ::-1] [[[ 4  3  2  1]
  [ 8  7  6  5]
  [12 11 10  9]]

 [[16 15 14 13]
  [20 19 18 17]
  [24 23 22 21]]

 [[28 27 26 25]
  [32 31 30 29]
  [36 35 34 33]]] (3, 3, 4)
b[0, ::],b[1, ::],b[-1, ::],b[0:2, ::]
b[0, ::] [[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]] (3, 4)
b[1, ::] [[13 14 15 16]
 [17 18 19 20]
 [21 22 23 24]] (3, 4)
b[-1, ::] [[25 26 27 28]
 [29 30 31 32]
 [33 34 35 36]] (3, 4)
b[0:2, ::] [[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]

 [[13 14 15 16]
  [17 18 19 20]
  [21 22 23 24]]] (2, 3, 4)
b[:, 0:],b[:, 1:],b[:, -1:],b[:, 0:2:]
b[:, 0:] [[[ 1  2  3  4]
  [ 5  6  7  8]
  [ 9 10 11 12]]

 [[13 14 15 16]
  [17 18 19 20]
  [21 22 23 24]]

 [[25 26 27 28]
  [29 30 31 32]
  [33 34 35 36]]] (3, 3, 4)
b[:, 1:] [[[ 5  6  7  8]
  [ 9 10 11 12]]

 [[17 18 19 20]
  [21 22 23 24]]

 [[29 30 31 32]
  [33 34 35 36]]] (3, 2, 4)
b[:, -1:] [[[ 9 10 11 12]]

 [[21 22 23 24]]

 [[33 34 35 36]]] (3, 1, 4)
b[:, 0:2:] [[[ 1  2  3  4]
  [ 5  6  7  8]]

 [[13 14 15 16]
  [17 18 19 20]]

 [[25 26 27 28]
  [29 30 31 32]]] (3, 2, 4)
b[::, 0],b[::, 1],b[::, -1],b[::, 0:2:]
b[::, 0] [[ 1  2  3  4]
 [13 14 15 16]
 [25 26 27 28]] (3, 4)
b[::, 1] [[ 5  6  7  8]
 [17 18 19 20]
 [29 30 31 32]] (3, 4)
b[::, -1] [[ 9 10 11 12]
 [21 22 23 24]
 [33 34 35 36]] (3, 4)
b[::, 0:2:] [[[ 1  2  3  4]
  [ 5  6  7  8]]

 [[13 14 15 16]
  [17 18 19 20]]

 [[25 26 27 28]
  [29 30 31 32]]] (3, 2, 4)
b[:,:, 0],b[:,:, 1],b[:,:, -1],b[:,:, 0:2:]
b[:, :, 0] [[ 1  5  9]
 [13 17 21]
 [25 29 33]] (3, 3)
b[:, :, 1] [[ 2  6 10]
 [14 18 22]
 [26 30 34]] (3, 3)
b[:, :, -1] [[ 4  8 12]
 [16 20 24]
 [28 32 36]] (3, 3)
load_image('/home/ubuntu/sharedData/swp/dlLab/fastaiRepository/fastai/data/SOD/RGBDcollection/depth/10_01-16-36_0_Depth.png',512).shape
(512, 512, 3)

add some fog

# modification of https://github.com/FLHerne/mapgen/blob/master/diamondsquare.py
def plasma_fractal(mapsize=256, wibbledecay=3):
    """
    Generate a heightmap using diamond-square algorithm.
    Return square 2d array, side length 'mapsize', of floats in range 0-255.
    'mapsize' must be a power of two.
    """
    assert (mapsize & (mapsize - 1) == 0)
    maparray = np.empty((mapsize, mapsize), dtype=np.float_)
    maparray[0, 0] = 0
    stepsize = mapsize
    wibble = 100

    def wibbledmean(array):
        return array / 4 + wibble * np.random.uniform(-wibble, wibble, array.shape)

    def fillsquares():
        """For each square of points stepsize apart,
           calculate middle value as mean of points + wibble"""
        cornerref = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
        squareaccum = cornerref + np.roll(cornerref, shift=-1, axis=0)
        squareaccum += np.roll(squareaccum, shift=-1, axis=1)
        maparray[stepsize // 2:mapsize:stepsize,
        stepsize // 2:mapsize:stepsize] = wibbledmean(squareaccum)

    def filldiamonds():
        """For each diamond of points stepsize apart,
           calculate middle value as mean of points + wibble"""
        mapsize = maparray.shape[0]
        drgrid = maparray[stepsize // 2:mapsize:stepsize, stepsize // 2:mapsize:stepsize]
        ulgrid = maparray[0:mapsize:stepsize, 0:mapsize:stepsize]
        ldrsum = drgrid + np.roll(drgrid, 1, axis=0)
        lulsum = ulgrid + np.roll(ulgrid, -1, axis=1)
        ltsum = ldrsum + lulsum
        maparray[0:mapsize:stepsize, stepsize // 2:mapsize:stepsize] = wibbledmean(ltsum)
        tdrsum = drgrid + np.roll(drgrid, 1, axis=1)
        tulsum = ulgrid + np.roll(ulgrid, -1, axis=0)
        ttsum = tdrsum + tulsum
        maparray[stepsize // 2:mapsize:stepsize, 0:mapsize:stepsize] = wibbledmean(ttsum)

    while stepsize >= 2:
        fillsquares()
        filldiamonds()
        stepsize //= 2
        wibble /= wibbledecay

    maparray -= maparray.min()
    return maparray / maparray.max()
def fog(x, severity=1):
    c = [(1.5, 2), (2, 2), (2.5, 1.7), (2.5, 1.5), (3, 1.4)][severity - 1]
    set_trace()

    x = np.array(x) / 255.
    max_val = x.max()
    x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis]
    return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255
rgbTensor.shape
torch.Size([3, 496, 869])
fog(rgbTensor,severity=3)
> /tmp/ipykernel_6080/425086914.py(5)fog()
      3     set_trace()
      4 
----> 5     x = np.array(x) / 255.
      6     max_val = x.max()
      7     x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis]

ipdb> n
> /tmp/ipykernel_6080/425086914.py(6)fog()
      4 
      5     x = np.array(x) / 255.
----> 6     max_val = x.max()
      7     x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis]
      8     return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255

ipdb> x.shape
(3, 496, 869)
ipdb> c[0]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_6080/3169706399.py in <module>
----> 1 fog(rgbTensor,severity=3)

/tmp/ipykernel_6080/425086914.py in fog(x, severity)
      4 
      5     x = np.array(x) / 255.
----> 6     max_val = x.max()
      7     x += c[0] * plasma_fractal(wibbledecay=c[1])[:224, :224][..., np.newaxis]
      8     return np.clip(x * max_val / (max_val + c[0]), 0, 1) * 255

ValueError: operands could not be broadcast together with shapes (3,496,869) (224,224,1) (3,496,869)